Part 1 : Descriptive analysis

Unique French names

Evolution over time

#Changing the format of year in national data
data_nat_clean$year <- str_pad(data_nat_clean$year,5,"right")
data_nat_clean$year <- str_replace(data_nat_clean$year," ","-31-12")
data_nat_clean$year <- as.Date(data_nat_clean$year, format="%Y-%d-%m")

#Calculate number of unique names
distinct_names <- data_nat_clean %>% 
                  group_by(year) %>% 
                  distinct(firstname) %>%
                  summarise(n_names = n())

#Plotting result                 
plot_distinct_names <- 
  distinct_names %>% 
  ggplot(aes(x=year, y=n_names)) + 
  geom_line(size=1.2, color="blue") +         
  ggtitle("Number of French unique names from 1900 to 2018") +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlab("Year") + ylab("Number of unique names")

plot_distinct_names

Difference between one year to another over time

Partie 2

zinedine <- data_nat_clean # %>% filter(year(year) > 1993 & year(year)<2003)
#zinedine$year <- as.factor(zinedine$year)
zinedine1 <- zinedine %>% filter(firstname == "ZINEDINE"  | firstname=="BIXENTE" | firstname=="YOURI")
graph <- ggplot(zinedine1, aes(x = year, y = number, colour = firstname))+ 
  geom_line(size=1.5) + 
  ggtitle("Plot of number of name by years for football player in 1998") +
  xlab("Year") + ylab("Number of name") +
  geom_vline(aes (xintercept =as.numeric(as.Date("1998-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("1998-01-01")), y=180, label="1998 Football world cup",colour = "black", show.legend=FALSE)
graph

#CM18 <- data_nat_clean %>% filter(year(year) > 2010 )
#CM18 <- CM18  %>% filter(firstname == "ANTOINE"  | firstname=="KYLIAN" | firstname=="BLAISE" | firstname=="BENJAMIN" | firstname=="SAMUEL" | firstname=="HUGO")
#graph <- ggplot(CM18, aes(x = year, y = number, colour = firstname))+ 
 # geom_line(size=1.5) + ggtitle("Plot of number of name by years for football player IN 2018") +
  #xlab("Year") + ylab("Number of name")
#graph

got <- data_nat_clean %>% filter(firstname=="BRAN" |firstname == "SANSA" |firstname == "DAENERYS")
got <- got %>% filter(year(year) > 2004 )
graph <- ggplot(got, aes(x = year, y = number, colour = firstname))+ 
  geom_line(size=1.5) +
  ggtitle("Plot of number of name by years in link with Game of throne") +
  xlab("Year") + ylab("Number of name")+
  geom_vline(aes (xintercept =as.numeric(as.Date("2011-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("2011-01-01")), y=20, label="Game of throne, season 1",colour = "black", show.legend=FALSE)
graph

arwen <- data_nat_clean %>% filter(firstname == "ARWEN" & sex=="2")
cinema <- data_nat_clean %>% filter(firstname=="NEO" |firstname == "BELLA" |firstname == "ANAKIN")
cinema <- bind_rows(arwen, cinema)
cinema <- cinema%>% filter(year(year) > 1990 )
graph <- ggplot(cinema, aes(x = year, y = number, colour = firstname))+ 
  geom_line(size=1.5) + ggtitle("Plot of number of name by years in link with the cinema") +
  xlab("Year") + ylab("Number of name")+
  geom_vline(aes (xintercept =as.numeric(as.Date("1999-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("1999-01-01")), y=25, label="Matrix",colour = "black", size = 3, show.legend=FALSE)+
  geom_vline(aes (xintercept =as.numeric(as.Date("2009-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("2009-01-01")),y=105, label="Twilight",colour = "black", size = 3, show.legend=FALSE)+
  geom_label(x=as.numeric(as.Date("1999-01-01")), y=100, label="Star Wars: Episode I ",colour = "black", size = 3, show.legend=FALSE)+
  geom_vline(aes (xintercept =as.numeric(as.Date("2001-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("2001-01-01")), y=175, label="The Lord of the rings ",colour = "black", size = 3, show.legend=FALSE)
graph

adolphe <- data_nat_clean %>% filter(firstname == "ADOLPHE" & sex=="1")
adolphe <- adolphe %>% filter(year(year) > 1909 & year(year) < 1951 )
victoire <- data_nat_clean %>% filter(firstname == "VICTOIRE" & sex=="2")
victoire <- victoire %>% filter(year(year) > 1909 & year(year) < 1951 )

ww <- data_nat_clean %>% filter(firstname == "JOFFRE" | firstname == "JOFFRETTE"| firstname == "ADOLPHINE")
ww <- ww%>% filter(year(year) > 1909 & year(year) < 1951 )
ww <- bind_rows(ww, adolphe, victoire)
graph <- ggplot(ww, aes(x = year, y = number, colour = firstname))+ 
  geom_line(size=1) + ggtitle("Plot of number of name by years in link with the world wars") +
  xlab("Year") + ylab("Number of name")+
  geom_vline(aes (xintercept =as.numeric(as.Date("1914-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("1914-01-01")), y=0, label="Marne's Battle",colour = "black", size = 2.5, show.legend=FALSE)+
  geom_vline(aes (xintercept =as.numeric(as.Date("1921-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("1921-01-01")),y=550, label="Hitler leader of the NSDAP",colour = "black", size = 2.5, show.legend=FALSE)+
  geom_vline(aes (xintercept =as.numeric(as.Date("1918-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("1918-01-01")), y=500, label="End of the World War I",colour = "black", size = 2.5, show.legend=FALSE)+
  geom_vline(aes (xintercept =as.numeric(as.Date("1945-01-01"))), linetype = "longdash")+
  geom_label(x=as.numeric(as.Date("1945-01-01")), y=200, label="End of the World II",colour = "black", size = 2.5, show.legend=FALSE)
graph

got <- data_nat_clean %>% filter(firstname=="FELICIE" |firstname == "ROXANNE" |firstname == "OSCAR" |firstname == "LOLITA")
#got <- got %>% filter(year(year) > 2004 )
graph <- ggplot(got, aes(x = year, y = number, colour = firstname))+
  geom_line(size=1.5) +
  ggtitle("Plot of number of name by years in link with Game of throne") +
  xlab("Year") + ylab("Number of name")
  #geom_vline(aes (xintercept =as.numeric(as.Date("2011-01-01"))), linetype = "longdash")+
  #geom_label(x=as.numeric(as.Date("2011-01-01")), y=20, label="Game of throne, season 1",colour = "black", show.legend=FALSE)
graph

data_dpt_clean$year <- str_pad(data_dpt_clean$year,5,"right")
data_dpt_clean$year <- str_replace(data_dpt_clean$year," ","-31-12")
data_dpt_clean$year <- as.Date(data_dpt_clean$year, format="%Y-%d-%m")

dep <- data_dpt_clean %>% filter(year(year)>1989 & year(year)<2013)
dep <-dep %>% mutate(department=case_when(
  department %in% c("95", "78", "91", "77","94","92","93","75") ~ "Ile-de-France",
  department %in% c("08","51","10","52") ~ "Champagne-Ardenne",
  department %in% c("02","60","80") ~ "Picardie",
  department %in% c("76", "27") ~ "Haute-normandie",
  department %in% c("18","28","36","37", "41", "45") ~ "Centre",
  department %in% c("14","50","61") ~ "Basse-Normandie",
  department %in% c("21", "58", "71", "89") ~ "Bourgogne",
  department %in% c("59","62") ~ "Nord-Pas-de-Calais",
  department %in% c("54","55","57", "88") ~ "Lorraine",
  department %in% c("67", "68") ~ "Alsace",
  department %in% c("25","39","70","90") ~ "Franche-Comté",
  department %in% c("44","49","53", "72","85") ~ "Pays de la Loire",
  department %in% c("22", "29", "35", "56") ~ "Bretagne",
  department %in% c("16","17","79","86") ~ "Poitou-CharenteS",
  department %in% c("24","33","40","47","64") ~ "Aquitaine",
  department %in% c("09", "12", "31", "32","46","65","81","82") ~ "Midi-Pyrenées",
  department %in% c("19","23","87") ~ "Limousin",
  department %in% c("01","07","26","38","42","69","73","74") ~ "Rhone-Alpes",
  department %in% c("03", "15", "43", "63") ~ "Auvergne",
  department %in% c("11","30","34","48", "66") ~ "Languedoc-Roussillon",
  department %in% c("04","05","06","13", "83","84") ~ "PACA ",
  department %in% c("20") ~ "Corse",
  department %in% c("971","972","973","974") ~ "Overseas territories",
  ))
dep <- dep %>% rename(Region = department)

eco1990boy <- dep %>% filter(sex==1, year(year)==1990) %>% 
  group_by(Region) %>%
  filter(number ==max(number))
eco1990boy <- merge(eco1990boy, eco, by = "Region")
eco1990girl <- dep %>% filter(sex==2, year(year)==1990) %>% 
  group_by(Region) %>%
  filter(number ==max(number))
eco1990girl <- merge(eco1990girl, eco, by = "Region")
eco1990 <- bind_rows(eco1990boy, eco1990girl)

graph <- ggplot(eco1990boy, aes(x = firstname, y = X1990, color=Region))+ 
  geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 1990 by Region and GDP") +
  xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph

graph <- ggplot(eco1990girl, aes(x = firstname, y = X1990, color=Region))+ 
  geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 1990 by Region and GDP") +
  xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph

eco2000boy <- dep %>% filter(sex==1, year(year)==2000) %>% 
  group_by(Region) %>%
  filter(number ==max(number))
eco2000boy <- merge(eco2000boy, eco, by = "Region")
eco2000girl <- dep %>% filter(sex==2, year(year)==2000) %>% 
  group_by(Region) %>%
  filter(number ==max(number))
eco2000girl <- merge(eco2000girl, eco, by = "Region")
eco2000 <- bind_rows(eco2000boy, eco2000girl)

graph <- ggplot(eco2000boy, aes(x = firstname, y = X2000, color=Region))+ 
  geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 2000 by Region and GDP") +
  xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph

graph <- ggplot(eco2000girl, aes(x = firstname, y = X2000, color=Region))+ 
  geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 2000 by Region and GDP") +
  xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph

eco2010boy <- dep %>% filter(sex==1, year(year)==2010) %>% 
  group_by(Region) %>%
  filter(number ==max(number))
eco2010boy <- merge(eco2010boy, eco, by = "Region")
eco2010girl <- dep %>% filter(sex==2, year(year)==2010) %>% 
  group_by(Region) %>%
  filter(number ==max(number))
eco2010girl <- merge(eco2010girl, eco, by = "Region")
eco2010 <- bind_rows(eco2010boy, eco2010girl)

graph <- ggplot(eco2010boy, aes(x = firstname, y = X2010, color=Region))+ 
  geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 2010 by Region and GDP") +
  xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph

graph <- ggplot(eco2010girl, aes(x = firstname, y = X2010, color=Region))+ 
  geom_point(size=1.5) + ggtitle("Plot of more popular boys firstnames in 2010 by Region and GDP") +
  xlab("GDP") + ylab("Number of name")+ theme(axis.text.x = element_text(angle = 60, hjust = 1))
graph